1 module dataframe.typed;
2 import dataframe.dataframe;
3 import std.conv;
4 import std.csv;
5 import std.datetime;
6 import std.exception;
7 import std.range:array, stride,only;
8 import std.stdio;
9 import std.variant;
10 import std.string:isNumeric;
11 import std.typecons:tuple,Tuple;
12 import std.traits;
13 import std.file;
14 
15 DataFrameTyped typedFrameFromCSV(string filename, string[] titles)
16 {
17 	DataFrame frame;
18 	frame=frame.loadCSVFile(filename,(titles.length>0));
19 	DataFrameTyped typedFrame;
20 	auto newTitles=(titles.length==0)?
21 		frame.indexTitle~frame.columnTitles:
22 		titles;
23 	typedFrame=typedFrame.setColumnTitles(newTitles)
24 				.setColumnTypes(frame.findColumnTypes)
25 				.loadCSVFile(filename,newTitles,(titles.length==0));
26 	return typedFrame;
27 }
28 
29 struct DataFrameTypedRow
30 {
31 	DataFrameTyped *frame;
32 	size_t rowNumber;
33 
34 /*	auto opIndexAssign(T)(T value, string colName)
35 	{
36 		import std.algorithm:canFind;
37 		enforce(frame.columnTitles.canFind(colName));
38 		(*frame)[rowNumber,colName]=value;
39 		return this;
40 	}
41 */
42 	auto opDispatch(string colName)()
43 	{
44 		import std.algorithm:canFind;
45 		enforce(frame.columnTitles.canFind(colName));
46 		return (*frame).loadCell!KalVariant(rowNumber,colName);	
47 	}
48 	void opDispatch(string colName,T)(T value)
49 	{
50 		import std.algorithm:canFind;
51 		enforce(frame.columnTitles.canFind(colName));
52 		(*frame)[rowNumber,colName]=value;
53 	}
54 /*	T loadCell(T)(string series)
55 	{
56 		return (*frame).loadCell!T(rowNumber,series);
57 	}*/
58 }
59 
60 
61 struct DataFrameTyped
62 {
63 	string title;
64 	string[] columnTitles;
65 	ColumnType[string] columnTypes;
66 
67 	char separator=',';
68 	char quote='\"';
69 	size_t numRows;
70 	struct Values
71 	{
72 		double[][string] doubles;
73 		int[][string] ints;
74 		long[][string] longs;
75 		std.datetime.Date[][string] dates;
76 		std.datetime.DateTime[][string] dateTimes;
77 		string[][string] strings;
78 	}
79 	Values values;
80 	size_t[string] stringSizes;
81 
82 	
83 	auto setRows(size_t rows)
84 	{
85 		this.length=rows;
86 		return this;
87 	}
88 
89 	auto insertColumn(T)(string title,ColumnType type,T[] vals)
90 	{
91 		this.columnTitles~=title;
92 		this.columnTypes[title]=type;
93 		final switch(type) with(ColumnType)
94 		{
95 			case Double:
96 				values.doubles[title]=vals;
97 				break;
98 			case Int:
99 				values.ints[title]=vals;
100 				break;
101 			case Long:
102 				values.longs[title]=vals;
103 				break;
104 			case Date:
105 				values.dates[title]=vals;
106 				break;
107 			case DateTime:
108 				values.dateTimes[title]=vals;
109 				break;
110 		}
111 		return this;
112 	}
113 
114 	auto deleteColumn(string title)
115 	{
116 		import std.algorithm:countUntil;
117 		auto i=columnTitles.countUntil(title);
118 		enforce(i>=0);
119 		final switch(columnTypes[title]) with(ColumnType)
120 		{
121 			case Double:
122 				values.doubles.remove(title);
123 				break;
124 			case Int:
125 				values.ints.remove(title);
126 				break;
127 			case Long:
128 				values.longs.remove(title);
129 				break;
130 			case Date:
131 				values.dates.remove(title);
132 				break;
133 			case DateTime:
134 				values.dateTimes.remove(title);
135 				break;
136 			case String:
137 				values.strings.remove(title);
138 				break;
139 		}
140 		columnTypes.remove(title);
141 		if (i==0)
142 			columnTitles=columnTitles[1..$];
143 		else if (i==columnTitles.length)
144 			columnTitles=columnTitles[0..$-1];
145 		else
146 			columnTitles=columnTitles[0..i]~columnTitles[i+1..$];
147 		return this;
148 	}
149 	void mergeCell(DataFrameTyped frame, string series, size_t rowNum)
150 	{
151 		final switch(frame.columnTypes[series]) with(ColumnType)
152 		{
153 			case Double:
154 				values.doubles[series]~=frame.values.doubles[series][rowNum];
155 				return;
156 			case Int:
157 				values.ints[series]~=frame.values.ints[series][rowNum];
158 				return;
159 			case Long:
160 				values.longs[series]~=frame.values.longs[series][rowNum];
161 				return;
162 			case Date:
163 				values.dates[series]~=frame.values.dates[series][rowNum];
164 				return;
165 			case DateTime:
166 				values.dateTimes[series]~=frame.values.dateTimes[series][rowNum];
167 				return;
168 			case String:
169 				values.strings[series]~=frame.values.strings[series][rowNum];
170 				return;
171 		}
172 
173 	}
174 	void appendCell(T)(string series, T value)
175 	{
176 		final switch(columnTypes[series]) with(ColumnType)
177 		{
178 			case Double:
179 				values.doubles[series]~=value.to!double;
180 			case Int:
181 				values.ints[series]~=value.to!int;
182 			case Long:
183 				values.longs[series]~=value.to!long;
184 			case Date:
185 				values.dates[series]~=value.to!Date;
186 			case DateTime:
187 				values.dateTimes[series]~=value.to!DateTime;
188 			case String:
189 				values.strings[series]~=value.to!string;
190 		}
191 	}
192 
193 	T loadCell(T)(size_t row, string series)
194 	if(std.traits.isNumeric!T)
195 	{
196 		final switch(columnTypes[series]) with(ColumnType)
197 		{
198 			case Double:
199 				return values.doubles[series][row].to!T;
200 			case Int:
201 				return values.ints[series][row].to!T;
202 			case Long:
203 				return values.longs[series][row].to!T;
204 			case Date,DateTime:
205 				throw new Exception("cannot convert date/datetime to numeric type "~T.stringof);
206 			case String:
207 				throw new Exception("cannot convert string to numeric type "~T.stringof);
208 		}
209 		assert(0);
210 		//return (loadCell!(T[])(series,row,row+1))[0];
211 	}
212 	T loadCell(T)(size_t row, string series)
213 	if(is(T==DateTime) || is(T==Date))
214 	{
215 		final switch(columnTypes[series]) with(ColumnType)
216 		{
217 			case Double,Int,Long:
218 				throw new Exception("cannot convert number to date type");
219 			case Date:
220 				return values.dates[series][row].to!T;
221 			case DateTime:
222 				return values.dateTimes[series][row].to!T;
223 			case String:
224 				return cast(std.datetime.DateTime)SysTime.fromSimpleString(values.strings[series][row]);
225 		}
226 		assert(0);
227 		//return (loadCell!(T[])(series,row,row+1))[0];
228 	}
229 	T loadCell(T)(size_t row, string series)
230 	if(is(T==string))
231 	{
232 		final switch(columnTypes[series]) with(ColumnType)
233 		{
234 			case Double:
235 				return values.doubles[series][row].to!T;
236 			case Int:
237 				return values.ints[series][row].to!T;
238 			case Long:
239 				return values.longs[series][row].to!T;
240 			case Date:
241 				return values.dates[series][row].to!T;
242 			case DateTime:
243 				return values.dateTimes[series][row].to!T;
244 			case String:
245 				return values.strings[series][row];
246 		}
247 		assert(0);
248 		//return (loadCell!(T[])(series,row,row+1))[0];
249 	}
250 /*
251 	T loadCell(T)(string series, size_t start, size_t end)
252 	{
253 		final switch(columnTypes[series]) with(ColumnType)
254 		{
255 			case Double:
256 				return values.doubles[series][start..end].to!T;
257 			case Int:
258 				return values.ints[series][start..end].to!T;
259 			case Long:
260 				return values.longs[series][start..end].to!T;
261 			case Date:
262 				return values.dates[series][start..end].to!T;
263 			case DateTime:
264 				return values.dateTimes[series][start..end].to!T;
265 			case String:
266 				return values.strings[series][start..end].to!T;
267 		}
268 	}
269 */
270 	DataFrameTyped setTitle(string title)
271 	{
272 		this.title=title;
273 		return this;
274 	}
275 	DataFrameTyped setColumnTitles(string[] titles)
276 	{
277 		this.columnTitles=titles;
278 		return this;
279 	}
280 	DataFrameTyped setColumnTypes(ColumnType[] columnTypes)
281 	{
282 		foreach(i,title;columnTitles)
283 			this.columnTypes[title]=columnTypes[i];
284 		enforce(this.columnTitles.length==this.columnTypes.keys.length);
285 		return this;
286 	}
287 
288 	size_t length() @property
289 	{
290 		return this.numRows;
291 	}
292 
293 	void length(size_t rows) @property
294 	{
295 		if (rows==this.numRows)
296 			return;
297 		foreach(col;columnTitles)
298 		{
299 			final switch(columnTypes[col]) with(ColumnType)
300 			{
301 				case Double:
302 					this.values.doubles[col].length=rows;
303 					break;
304 				case Int:
305 					this.values.ints[col].length=rows;
306 					break;
307 				case Long:
308 					this.values.longs[col].length=rows;
309 					break;
310 				case Date:
311 					this.values.dates[col].length=rows;
312 					break;
313 				case DateTime:
314 					this.values.dateTimes[col].length=rows;
315 					break;
316 				case String:
317 					this.values.strings[col].length=rows;
318 					break;
319 			}
320 		}
321 		this.numRows=rows;
322 	}
323 
324 	DataFrameTyped setIndexValues(T)(T[] indexValues)
325 	{
326 		this.length=indexValues.length;
327 		final switch(this.indexType) with(ColumnType)
328 		{
329 			case Double:
330 				foreach(i,value;indexValues)
331 					this.values.doubles[i*numDoubleCols]=value;
332 				break;
333 			case Int:
334 				foreach(i,value;indexValues)
335 					this.values.ints[i*numIntCols]=value;
336 				break;
337 			case Long:
338 				foreach(i,value;indexValues)
339 					this.values.longs[i*numLongCols]=value;
340 				break;
341 			case String:
342 				foreach(i,value;indexValues)
343 					this.values.strings[i*numStringCols]=value;
344 				break;
345 			case Date:
346 				foreach(i,value;indexValues)
347 					this.values.dates[i*numDateCols]=value;
348 				break;
349 			case DateTime:
350 				foreach(i,value;indexValues)
351 					this.values.datetimes[i*numDateTimeCols]=value;
352 				break;
353 		}
354 		foreach(i,value;indexValues)
355 			this.indexValues[i]=indexValues;
356 		return this;
357 	}
358 
359 /*	DataFrameTyped setCellValues(KalVariant[][] cellValues)
360 	{
361 		foreach(i,row;cellValues)
362 		{
363 			foreach(j,cell;row)
364 			{
365 				this[i,j+1]=cell;
366 			}
367 		}
368 		return this;
369 	}
370 	DataFrameTyped setAllValues(KalVariant[][] values)
371 	{
372 		foreach(i,row;values)
373 		{
374 			this.indexValues[i]=values[i][0];
375 			foreach(j,cell;row[1..$])
376 			{
377 				this[i,j+1]=cell;
378 			}
379 		}
380 		return this;
381 	}
382 */
383 	DataFrameTyped loadCSVFile(string csv, string[] columnTitles=[],bool skipFirst=false)
384 	{
385 		auto file=std.file.read(csv);
386 		return loadCSV(cast(string) file,columnTitles,skipFirst);
387 	}
388 
389 	DataFrameTyped setSeparator(char separator)
390 	{
391 		this.separator=separator;
392 		return this;
393 	}
394 	DataFrameTyped setQuote(char separator)
395 	{
396 		this.quote=separator;
397 		return this;
398 	}
399 
400 	DataFrameTyped mergeFrames(DataFrameTyped frame)
401 	{
402 		enforce((frame.title=="") || (frame.title==this.title) || (this.title==""));
403 		enforce(frame.columnTitles.length==0 || frame.columnTitles==this.columnTitles || this.columnTitles.length==0);
404 		enforce(frame.columnTypes==this.columnTypes);
405 		foreach(colTitle;frame.columnTitles)
406 		{
407 			foreach(rowNum;0..frame.numRows)
408 				mergeCell(frame,colTitle,rowNum);
409 		}
410 		this.numRows+=frame.numRows;
411 		// should do sort and uniq
412 		return this;
413 	}
414 
415 	size_t numCols()
416 	{
417 		return columnTypes.length;
418 	}
419 
420 	auto opIndex(size_t row)
421 	{
422 		return DataFrameTypedRow(&this,row);
423 	}
424 	/*
425 	auto opIndexAssign(DataFrameTypedRow rowData,size_t rowNumber)
426 	{
427 		foreach(j,col;cols)
428 		{
429 			this[rowNumber,columnTitles[j]]=rowData.frame[]
430 	}
431 	*/
432 	T opIndex(T)(size_t row, size_t col)
433 	{
434 		//enforce((row>=0) && (col>=0) && (col <=numCols) &&(row<=indexValues.length));
435 		return loadCell!T(columnTitles[col],row,row+1);
436 	}
437 
438 	T opIndex(T)(size_t row, string col)
439 	{
440 		return loadCell!T(col,row,row+1);
441 	}
442 
443 	T opIndex(T)(size_t[] rows, size_t[] cols)
444 	{
445 		T[][] ret;
446 		ret.length=rows.length;
447 		foreach(ref line;ret)
448 			line.length=cols.length;
449 		foreach(i,row;rows)
450 		{
451 			foreach(j,col;cols)
452 			{
453 				ret[i][j]=loadCell!T(columnTitles[col],row,row+1);
454 			}
455 		}
456 		return ret;
457 	}
458 
459 	T opIndexAssign(T)(T value, size_t row, size_t col)
460 	{
461 		return opIndexAssign!T(value,row,this.columnTitles[col]);
462 	}
463 
464 	T opIndexAssign(T)(T value, size_t row, string col)
465 	{
466 		//stdout.writefln("opIndexAssign %s,%s,%s",value,row,col);
467 		//stdout.writefln("this.values.strings.keys=%s",this.values.strings.keys);
468 		//stdout.writefln("T=%s",typeid(T));
469 		//stdout.flush;
470 		// enforce type safety for columns
471 		//enforce((row>=0) && (col>=0) && (col <=numCols) &&(row<=indexValues.length));
472 		final switch(columnTypes[col]) with(ColumnType)
473 		{
474 			case Double:
475 				this.values.doubles[col][row]=value.to!double;
476 				return value;
477 			case Int:
478 				this.values.ints[col][row]=value.to!int;
479 				return value;
480 			case Long:
481 				this.values.longs[col][row]=value.to!long;
482 				return value;
483 			case String:
484 				this.values.strings[col][row]=value.to!string;
485 				return value;
486 			case ColumnType.Date:
487 				static if(is(T==std.datetime.Date))
488 					this.values.dates[col][row]=value;
489 				else static if(is(T==std.datetime.DateTime))
490 					this.values.dates[col][row]=value.dateTimeToDate;
491 				else static if(is(T==std.datetime..string))
492 					this.values.dates[col][row]=value.stringToDate;
493 				return value;
494 			case ColumnType.DateTime:
495 				static if(is(T==std.datetime.DateTime))
496 					this.values.dateTimes[col][row]=value;
497 				else static if(is(T==std.datetime.Date))
498 					this.values.dateTimes[col][row]=value.dateToDateTime;
499 				else static if(is(T==std.datetime..string))
500 					this.values.dateTimes[col][row]=value.stringToDateTime;
501 				return value;
502 		}
503 	}
504 
505 	T[] columnValues(T)(string col)
506 	{
507 		final switch(columnTypes[col]) with(ColumnType)
508 		{
509 			case Double:
510 				return this.values.doubles[col];
511 			case Int:
512 				return this.values.ints[col];
513 			case Long:
514 				return this.values.longs[col];
515 			case String:
516 				return this.values.strings[col];
517 			case Date:
518 				return this.values.dates[col];
519 			case DateTime:
520 				return this.values.dateTimes[col];
521 		}
522 	}
523 	T[] columnValues(T)(size_t col)
524 	{
525 		return this.columnValues(this.columnTitles[col]);
526 	}
527 
528 	ColumnType[] findColumnTypes()
529 	{
530 		ColumnType[] ret;
531 		foreach(title;this.columnTitles)
532 			ret~=this.columnTypes[title];
533 		return ret;
534 	}
535 	size_t[] opSlice(size_t i)(size_t start, size_t end)
536 	if ((i==0)||(i==1))
537 	{
538 		return iota(start,end);
539 	}
540 
541 	size_t opDollar(size_t i)()
542 	{
543 		static if (i==0)
544 			return numRows;
545 		else static if(i==1)
546 			return numCols;
547 		else static assert(0);
548 	}
549 
550 	string toString()
551 	{
552 		string ret="Kaleidic Typed Dataframe: "~this.title~"\n\n";
553 
554 		foreach(j;0..numCols)
555 			ret~="\t"~this.columnTitles[j];
556 		ret~="\n";
557 		//log("numRows="~numRows.to!string);
558 		//log("numCols="~numCols.to!string);
559 
560 		foreach(i;0..numRows)
561 		{
562 			//log("row: "~i.to!string~": "~this.indexValues[i].to!string);
563 			foreach(j;columnTitles)
564 				ret~=loadCell!string(i,j)~"\t";
565 			ret~="\n";
566 		}
567 		return ret;
568 	}
569 
570 }
571